import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression as LR

data = pd.read_excel('./data/house_price.xlsx')

plt.rcParams['font.sans-serif'] = 'SimHei'  # 设置字体为SimHei
plt.rcParams['axes.unicode_minus'] = False  # 解决负号“-”显示异常

x = data.loc[:, '面积（㎡）'].values
y = data.loc[:, '价格（w）'].values

# 使用scikit-learn实现
lr = LR()
x_2d = x.reshape(-1, 1)
lr.fit(x_2d, y)
y_sklearn = lr.predict(x_2d - 1)

plt.scatter(x, y, color='blue', label='训练数据')
plt.plot(x_2d, y_sklearn, color='red', label='scikit-learn线性模型回归曲线')
plt.title('房屋面积-价格图')
plt.xlabel('面积（㎡）')
plt.ylabel('价格（w）')
plt.legend()
plt.show()